knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center') knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(data.table) library(ggplot2) library(parallel) library(caret) library(doParallel) library(pbapply)
barcodeSigsBinMat <- readRDS("./analysis/07.VirusClassification/01.NormalBarcodeSignal/20210825/barcodeSigsBinMat.Rds") load(file = "./analysis/07.VirusClassification/03.Merge/01.Classifiers/20210811.RData")
ROC_Tab_2 <- data.frame(predict(Fit1, barcodeSigsBinMat, type = "prob"), pred = predict(Fit1, newdata = barcodeSigsBinMat)) ROC_Tab_2 <- as.data.table(ROC_Tab_2, keep.rownames = "read") colnames(ROC_Tab_2) <- gsub("RTA.", "RTA-", colnames(ROC_Tab_2)) ROC_Tab_2$PP <- apply(ROC_Tab_2[, grepl("RTA", colnames(ROC_Tab_2)), with = F], 1, max)
ROC_Pb <- ROC_Tab_2[pred %in% c("RTA-08", "RTA-27") & PP > 0.3]
aligns <- readRDS("./analysis/07.VirusClassification/02.Prediction/20210825/AlignmentResult.Rds") aligns[, qname := paste0("read_", qname)] aligns <- aligns[!flag %in% c(2048, 2064)] aligns <- unique(aligns[mapq == 60, .(Species, qname)]) aligns <- aligns[qname %in% aligns[, .N, qname][N == 1, qname]] aligns[, table(Species)] aligns[, mean(!Species %in% c("HomSap", "MusMus", "SusScr"))] aligns <- aligns[!Species %in% c("HomSap", "MusMus", "SusScr")]
ROC_Pb <- ROC_Pb[read %in% aligns$qname]
library(ShortRead) fastq <- fread("/mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/DRS_virus2.fastq", sep = "\t", header = F) fa <- RNAStringSet(fastq[fastq[, grep("^@", V1)] + 1, ][[1]]) qual <- PhredQuality(fastq[grep("^@", V1) + 3][[1]]) names(qual) <- gsub("^@", "", fastq[grep("^@", V1)][[1]]) names(fa) <- gsub("^@", "", fastq[grep("^@", V1)][[1]]) fastq <- QualityScaledDNAStringSet(x = fa, quality = qual)
Pb_fq_27 <- fastq[paste0("read_", mapply(function(x) x[1], strsplit(names(fastq), " "))) %in% ROC_Pb[pred == "RTA-27", read]] Pb_fq_08 <- fastq[paste0("read_", mapply(function(x) x[1], strsplit(names(fastq), " "))) %in% ROC_Pb[pred == "RTA-08", read]]
writeQualityScaledXStringSet(Pb_fq_08, paste0("./analysis/08.m6A/01.Plasmodium/20211008/02.SplitFastq/Pb_0825_RTA08.fastq")) writeQualityScaledXStringSet(Pb_fq_27, paste0("./analysis/08.m6A/01.Plasmodium/20211008/02.SplitFastq/Pb_0825_RTA27.fastq"))
nanopolish index -d /mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/20210825_1639_MN35929_FAQ89985_6914f112/fast5_pass \ -s /mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/20210825_1639_MN35929_FAQ89985_6914f112/sequencing_summary_FAQ89985_7e754e64.txt \ /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA08.fastq nanopolish index -d /mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/20210825_1639_MN35929_FAQ89985_6914f112/fast5_pass \ -s /mnt/raid61/Personal_data/songjunwei/DRS_RTA/batch2_fq/20210825_1639_MN35929_FAQ89985_6914f112/sequencing_summary_FAQ89985_7e754e64.txt \ /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA27.fastq minimap2 -ax splice -t 10 /mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/fa/PlasmoDB-53_PbergheiANKA_Genome.fasta /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA08.fastq | samtools sort -@ 4 | samtools view -b > /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.bam minimap2 -ax splice -t 10 /mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/fa/PlasmoDB-53_PbergheiANKA_Genome.fasta /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA27.fastq | samtools sort -@ 4 | samtools view -b > /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.bam samtools index /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.bam samtools index /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.bam nanopolish eventalign -t 1 --scale-events -n \ -r /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA08.fastq \ -b /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.bam \ -g /mnt/raid61/Personal_data/tangchao/Temp/reference/PlasmoDB-53_PbergheiANKA_Genome.fasta > /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.eventalign.tsv nanopolish eventalign -t 1 --scale-events -n \ -r /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA27.fastq \ -b /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.bam \ -g /mnt/raid61/Personal_data/tangchao/Temp/reference/PlasmoDB-53_PbergheiANKA_Genome.fasta > /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.eventalign.tsv python /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/mCaller.py \ -m GATC -r /mnt/raid61/Personal_data/tangchao/Temp/reference/PlasmoDB-53_PbergheiANKA_Genome.fasta \ -d /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/r95_twobase_model_NN_6_m6A.pkl \ -e /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.eventalign.tsv \ -f /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA08.fastq -b A python /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/mCaller.py \ -m GATC -r /mnt/raid61/Personal_data/tangchao/Temp/reference/PlasmoDB-53_PbergheiANKA_Genome.fasta \ -d /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/r95_twobase_model_NN_6_m6A.pkl \ -e /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.eventalign.tsv \ -f /mnt/raid61/Personal_data/tangchao/Temp/20211008/Pb_0825_RTA27.fastq -b A python /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/make_bed.py -f /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.eventalign.diffs.6 -d 15 -t 0 python /mnt/raid61/Personal_data/tangchao/Document/biosoft/mCaller/make_bed.py -f /mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.eventalign.diffs.6 -d 5 -t 0
Pb_08 <- fread("/mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.methylation.summary.bed") Pb_27 <- fread("/mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.methylation.summary.bed")
Pb_Meth <- merge(Pb_08, Pb_27, by = c("V1", "V2", "V3", "V4", "V6")) colnames(Pb_Meth) <- c("Seqnames", "Start", "End", "Motif", "Strand", "MethRatio08", "Count08", "MethRatio27", "Count27")
Pb_Meth[order(abs(MethRatio27 - MethRatio08))]
Pb_MethRatio <- melt(Pb_Meth, id.vars = c("Seqnames", "Start", "End", "Motif", "Strand"), measure.vars = c("MethRatio08", "MethRatio27"), variable.name = "Sample", value.name = "MethRatio") Pb_Count <- melt(Pb_Meth, id.vars = c("Seqnames", "Start", "End", "Motif", "Strand"), measure.vars = c("Count08", "Count27"), variable.name = "Sample", value.name = "Count") Pb_MethRatio[, Sample := gsub("MethRatio", "RTA-", Sample)] Pb_Count[, Sample := gsub("Count", "RTA-", Sample)] Pb_Meth2 <- merge(Pb_MethRatio, Pb_Count, by = c("Seqnames", "Start", "End", "Motif", "Strand", "Sample"))
Pb_27_Spe <- Pb_27[!V2 %in% Pb_08$V2] Pb_08_Spe <- Pb_08[!V2 %in% Pb_27$V2]
colnames(Pb_27_Spe) <- c("Seqnames", "Start", "End", "Motif", "MethRatio", "Strand", "Count") colnames(Pb_08_Spe) <- c("Seqnames", "Start", "End", "Motif", "MethRatio", "Strand", "Count")
library(Biostrings) ref <- readDNAStringSet("/mnt/raid61/Personal_data/tangchao/Temp/reference/PlasmoDB-53_PbergheiANKA_Genome.fasta") names(ref) <- mapply(function(x) x[1], strsplit(names(ref), " "))
library(GenomicAlignments) bam08 <- readGAlignments("/mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA08.bam") cov08 <- coverage(bam08[strand(bam08) == "+"]) cov08 <- lapply(seq_along(cov08), function(i) { data.table(Seqnames = names(cov08)[i], POS = seq_len(length(cov08[[i]])), Depth = as.numeric(cov08[[i]])) }) cov08_p <- data.table(do.call(rbind, cov08), Strand = "+") cov08 <- coverage(bam08[strand(bam08) == "-"]) cov08 <- lapply(seq_along(cov08), function(i) { data.table(Seqnames = names(cov08)[i], POS = seq_len(length(cov08[[i]])), Depth = as.numeric(cov08[[i]])) }) cov08_n <- data.table(do.call(rbind, cov08), Strand = "-") cov08 <- rbind(cov08_p, cov08_n) setkey(cov08, Seqnames, POS, Strand)
bam27 <- readGAlignments("/mnt/raid61/Personal_data/tangchao/Temp/20211008/04.minimap2/Pb_0825_RTA27.bam") cov27 <- coverage(bam27[strand(bam27) == "+"]) cov27 <- lapply(seq_along(cov27), function(i) { data.table(Seqnames = names(cov27)[i], POS = seq_len(length(cov27[[i]])), Depth = as.numeric(cov27[[i]])) }) cov27_p <- data.table(do.call(rbind, cov27), Strand = "+") cov27 <- coverage(bam27[strand(bam27) == "-"]) cov27 <- lapply(seq_along(cov27), function(i) { data.table(Seqnames = names(cov27)[i], POS = seq_len(length(cov27[[i]])), Depth = as.numeric(cov27[[i]])) }) cov27_n <- data.table(do.call(rbind, cov27), Strand = "-") cov27 <- rbind(cov27_p, cov27_n) setkey(cov27, Seqnames, POS, Strand)
Pb_27_Spe <- merge(Pb_27_Spe, cov08, by.x = c("Seqnames", "End", "Strand"), by.y = c("Seqnames", "POS", "Strand")) setcolorder(Pb_27_Spe, c("Seqnames", "Start", "End", "Motif", "MethRatio", "Strand", "Count", "Depth")) setnames(Pb_27_Spe, "Depth", "Depth_08")
Pb_08_Spe <- merge(Pb_08_Spe, cov27, by.x = c("Seqnames", "End", "Strand"), by.y = c("Seqnames", "POS", "Strand")) setcolorder(Pb_08_Spe, c("Seqnames", "Start", "End", "Motif", "MethRatio", "Strand", "Count", "Depth")) setnames(Pb_08_Spe, "Depth", "Depth_27") Pb_08_Spe <- Pb_08_Spe[Depth_27 >= 10]
ggplot(Pb_Meth2, aes(x = Sample, y = MethRatio)) + geom_boxplot() + geom_point()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.